/**
 * 
 */
package com.attilax.dataspider;

import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.attilax.core;
import com.attilax.tryX;
import com.attilax.asyn.AsynUtil;
import com.attilax.concur.TaskUtil;
import com.attilax.count.reduce;
import com.attilax.curr.PoolUtil;
import com.attilax.device.CpuUtil;
import com.attilax.device.HardWareUtils;
import com.attilax.device.PerfUtil;
import com.attilax.dsl.VarUtil;
import com.attilax.exception.ExUtil;
import com.attilax.fileTrans.ConnEx;
import com.attilax.img.PngFormatEx;
import com.attilax.img.imgx;
import com.attilax.io.filex;
import com.attilax.io.pathx;
import com.attilax.lang.Global;
import com.attilax.lang.text.strUtil;
import com.attilax.lbs.NoRztEx;
import com.attilax.net.HttpUtil;
import com.attilax.net.websitex;
import com.attilax.util.CfgService;
import com.attilax.util.urlUtil;
import com.attilax.web.UrlX;
import com.google.common.collect.Lists;

/**
 * @author ASIMO
 *
 */
public class TsaolyoPicSpider extends PicSpider {
	private String startUrl = "http://cl.d7w.biz"; //cl.d7w.biz
	//= "http://www.1024goto.com";
	//startUrl
	public static final Logger logger = LoggerFactory
			.getLogger(TsaolyoPicSpider.class);
	  private AtomicInteger urlSleep=new AtomicInteger(); //mill
//	private AtomicInteger artSleep = new AtomicInteger();; // mill

//	int artPoolTaskCount;
	int PoolTaskCount;
	public int pages;
	public int pageStart;
	public String picSaveDir;
	private int startPage;
	private int endPage;
//	public  String startUrl = "http://www.1024goto.com";
	TaskExeReport rpt = new TaskExeReport();

	/**
	 * @author attilax 鑰佸搰鐨勭埅瀛�
	 * @throws InterruptedException
	 * @throws NoRztEx
	 * @throws ConnEx
	 * @throws IOException
	 * @since p17 d_b_0
	 */
	public static void main(String[] args) {
		System.out.println("--tt22");
		//auto close 120sec ,for avoid cpu too hi
		Runnable ra=()->{
			try {
				Thread.sleep(120*1000);
				System.exit(0);
			} catch (InterruptedException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		};
		//TaskUtil.asyn(ra, "threadName");
 
		// 5bu6562R
		// System.setProperty("endPage", "10");
		TsaolyoPicSpider x = new TsaolyoPicSpider();
		// x.fileName=args[0];// "c:\\r2.csv";
		String cfg = filex.read(pathx.appPath_webPrjMode() + "/spider.ini");
		CfgService cs = new CfgService().read(cfg);
		Map cfg_map = cs.m;
		String sn = new HardWareUtils().getSn().toLowerCase();

		TaskUtil.sleep_sec_throwEx(Integer.parseInt(cfg_map.get("dbg_sleep")
				.toString()));

//		int artsleep_frm_cfg = Integer.parseInt(cfg_map.get(sn + "_art_sleep")
//				.toString().trim());
	//	x.artSleep.set(artsleep_frm_cfg);
		  x.urlSleep.set( Integer.parseInt(
		 cfg_map.get(sn+"_pic_sleep").toString().trim()));
		// x.artPoolTaskCount=Integer.parseInt(cfg_map.get(sn+"_art_count").toString().trim());
		x.PoolTaskCount = Integer.parseInt(cfg_map.get(sn + "_pic_run_max")
				.toString().trim());

		x.picSaveDir = "c:\\0picSaveDir";
		x.startPage = Integer.parseInt(cfg_map.get("startPage").toString());
		x.endPage = Integer.parseInt(cfg_map.get("endPage").toString());
		
		// -----------for cmd debug
		// x.PicPoolTaskCount=300; //
		x.exec();
		// t2(x);

		String picurl = "http://cdn1.snapgram.co/imgs/2016/08/01/marker-small.png";
		String name2 = filex.getFileName(picurl);
		// System.out.println(name2);
		logger.info("--fi0055667r788");

		//

	}

	private static void t2(TsaolyoPicSpider x) {
		String h = "c:\\new 36.html";
		String h2 = filex.read(h, "utf8");
		List li = x.getArtListByPagehtml(h2);
		// logger.info(li);

		String s3 = "http://view.news.qq.com/original/intouchtoday/n3660.html";
		logger.info(x.getAbsUrlPic(s3,
				"./art.net_files/22088668b119b1691940c03f61ef6ea5a08094fc.jpg"));

		String h5 = filex.read("c:\\art.net.html", "gbk");
		List li5 = x.getPics_byHtml(h5,
				"http://cl.cmcher.com/htm_data/16/1609/2082995.html");
		System.out.println(li5);

		x.downPic(
				"http://img03.cweb-pix.com/images/2016/02/18/220886248d19f404ee8f601798bea7e4edb4377d.jpg",
				"tt");
	}

	filex fx;
	public String fileName;
	public String kw;
	// private ExecutorService picPool;
	private int min_artSleep = 300;
	PerfUtil perfUtil = new PerfUtil();

	/**
	 * stand
	 */
	public String exec() {
		ExecutorService taskPool = Executors.newFixedThreadPool( this.PoolTaskCount,new MyThreadFactory("task_pool"));
			//	new TaskPool(10, this.PicPoolTaskCount);
		logger.info("--start now");
		int minPicSleep = 1000;
//		Runnable upRun = () -> {
//
//			if (taskPool.nowRunCount.get() >= taskPool.MaxRun) {
//				logger.info(" --up_cpu_use: to MaxRunLimit,not up,now run count:"
//						+ taskPool.nowRunCount.get());
//				;
//				return;
//			}
//			taskPool.shouldUpPerf();
//			
//			logger.info(" --up_cpu_use: now taskPool.MaxRun:" + taskPool.MaxRun);
//		};
//
//		Runnable reduceRun = () -> {// reduce cpu use
//
//			if (taskPool.nowRunCount.get() <= taskPool.miniLimit) {
//				logger.info(" --reduce _cpu_use: to miniLimit,not reduce,now run count:"
//						+ taskPool.nowRunCount.get());
//				;
//				;
//				return;
//			}
//			// reduce
//			taskPool.MaxRun--;
//			logger.info("  --reduce_cpu_use: now taskPool.MaxRun:"
//					+ taskPool.MaxRun + " ,now run count:"
//					+ taskPool.nowRunCount.get());
//
//		};
//		Reducer rdcr = new Reducer(reduceRun);
//		rdcr.mode="time";
//		rdcr.freqRate = 1;
//		rdcr.setRuntimeRate(100,perfUtil.bus_freq); //ms
//		rdcr.maxCpu = 85;
//		Accer accr = new Accer(upRun);
//		accr.mode="time";
//		accr.freqRate = 10;
//		accr.setRuntimeRate(1000,perfUtil.bus_freq); //ms
//		accr.maxCpu = 85;
//
//		perfUtil.ajdCpuUseRateV3();
//
//		taskPool.freqRate = 1;
		urlPool = (ExecutorService) taskPool;
	//	perfUtil.addPart((BusPart) taskPool);
		//perfUtil.addPart(rdcr);
	//	perfUtil.addPart(accr);
		// picPool = Executors.newFixedThreadPool(PicPoolTaskCount);
		// fx=new filex(fileName);
		List<String> urls = getpageUrls();
		int n = 1;
		for (String ListUrls : urls) {
			this.nowPageIdx = n;
			try {

				exec_singlePage(ListUrls);

			} catch (Exception e) {
				e.printStackTrace();
			}
			n++;

		}

	//	picPool.execute(PoolUtil.endTask(picPool, "artPool"));
		// while (true) // check liage pool
		// {
		// TaskUtil.sleep_sec(1);
		//
		// int ajd = 3; // adjust for last
		// if (picPool.isShutdown()
		// && rpt.nowIndex_alreadyCount.get() + ajd >= rpt.sumbit_pic_count
		// .get()) {
		// TaskUtil.sleep_sec(10);
		// // last waittime
		// // picPool.execute(PoolUtil.endTask(picPool, "picPool"));
		// break;
		// }
		//
		// }

		// artPool.execute(PoolUtil.endTask(artPool,"artPool"));

		return null;

		// return tab.html();

	}

	int nowPageIdx;
	int nowArtIdx;
	int nowpicIdx;
	ExecutorService urlPool;

	/**
	 * stand
	 * 
	 * @author attilax 鑰佸搰鐨勭埅瀛�
	 * @param page
	 * @throws ConnEx
	 * @throws NoRztEx
	 * @throws ParseLsitEx
	 * @since p17 g_37_c
	 */
	private void exec_singlePage(String pageUrl) {
		// if(page>0)
		// break;
		System.out.println("-- will url:"+pageUrl);
		// System.out.println ( new websitex(). WebpageContent(url, "gbk", 3));
		String html = null;
		try {
			String api = pageUrl;
			// http://www.czvv.com/k5bu6562Rp0c0cc0s0m0e0f0d0.html
			websitex wc = new websitex();
			wc.refer = "http://www.czvv.com/";// http://www.czvv.com/
			html = wc.WebpageContent(api, "gbk", 60);
		} catch (Exception e) {
			// e.printStackTrace();
			// throw new ConnEx(e.getMessage());
			ExUtil.throwExV3(e, "-- url:" + pageUrl);
		}
		// ================trace
		// if (new File("C:\\traceOk").exists())
		// filex.save_safe(html, "c:\\rztTrace.html");
		// else
		// filex.del("c:\\rztTrace.html");
		// filex.read("c:\\rzt.html", "gbk");
		// filex.write(path + ".htm", html);
		// html=filex.read("c:\\00.htm");
		List<String> li = getArtListByPagehtml(html);

		for (String artUrl : li) {
			logger.info("--now start process url is :" + artUrl);

			try {
				Runnable runnable = new Runnable() {

					@Override
					public void run() {
						try {
							processArt(artUrl);

						} catch (Exception e) { // ingro err
							new RuntimeException("--url:" + artUrl, e)
									.printStackTrace();
						} finally {
							rpt.nowIndex_article.incrementAndGet();

							showRpt();
						}

					}

				};
			//	int artSleep = this.artSleep.get();
//				if (artSleep != 0)
//					TaskUtil.sleep(artSleep);
				
				rpt.sumbit_arts_count.incrementAndGet();
				runnable.run();
				// artPool.execute(runnable);
			

				// core.newThread(runnable, "threadName4arturl:" + artUrl);
			} catch (Exception e) {
				new RuntimeException("--url:" + artUrl, e).printStackTrace();
			}

		}

	}

	private void showRpt() {
		
		
		TaskUtil.asyn(()->{
			
			showRpt_asyn_wrap();
		},"perfinfo_thread in showRpt()"+filex.getUUidName());
		
	}

	private void showRpt_asyn_wrap() {
		int now = rpt.nowIndex_alreadyPicCount.get();
		logger.info("--***showRpt pic/picSubmit: " + String.valueOf(now) + "/"
				+ String.valueOf(rpt.sumbit_pic_count.get())
				+ ",,now art/artSubmit:" + rpt.nowIndex_article.get() + "/"
				+ String.valueOf(rpt.sumbit_arts_count.get()));
	//	int avg_cpu_use = new CpuUtil().getCpuRate_avg();
		
//		Runnable ra=()->{
//			String perfs = "-- perfinfo  cpuuse:" + PerfUtil.getCpuRate_retNull()
//					+ " avg_cpu_use:@avg@    artsleep:" + artSleep;
//			perfs = perfs.replace("@avg@", String.valueOf(avg_cpu_use));
//			logger.info(perfs);
//		};
//		ra.run();
		

//		TaskPool tp = (TaskPool) urlPool;
//		String nowRuns = String.valueOf(tp.nowRunCount.get());
//		logger.info(" --pic pool:nowRuns:" + nowRuns + " max:" + tp.MaxRun
//				+ "   min:" + tp.miniLimit +" pool_que:"+tp.wait_que.size() +tp.es) ;
	}

	/**
	 * attilax 2016骞�9鏈�27鏃� 涓嬪崍4:08:27
	 * 
	 * @param artUrl
	 */
	private void processArt(String artUrl) {
		// article
		String html = getArtHtml(artUrl);
		Map a = new ArticleService().process(html);
		String title = (String) a.get("title");
		List<String> li = getPics_byHtml(html, artUrl);

		for (String picurl : li) {

			try {

//				Runnable runnable = new Runnable() {
//
//					@Override
//					public void run() {
//						
//
//					}
//
//				};
				 int picSleep=this.urlSleep.get();
				if(picSleep!=0)
				  Thread.sleep(picSleep);
				rpt.sumbit_pic_count.incrementAndGet();
				logger.warn( "--tagqa1 pic_sumbit:"+picurl);
				Runnable command = ()->{
					try {
						downPic(picurl, title);
					} catch (Exception e) {
						new RuntimeException("--url:" + picurl, e)
								.printStackTrace();
						// e.printStackTrace();
					} finally {
					//	TaskPool tp = (TaskPool) urlPool;
					//	tp.nowRunCount.decrementAndGet();
						rpt.nowIndex_alreadyPicCount.incrementAndGet();
						logger.warn( "--tagqa1 pic_finish :"+picurl);
						showRpt();
					}
				};
				Thread td=new Thread(command);
				String name = "thrd--"+picurl;
				Global.dbName.set(name);
			//	thrdname.set(name);
				//td.setName(name);
				urlPool.execute(td);
				
				// core.newThread(runnable, "threadName" + picurl);
			} catch (Exception e) {
				new RuntimeException("--url:" + picurl, e).printStackTrace();
				;
			}

		}

	}
//	public static ThreadLocal<String> thrdname=new ThreadLocal<>();

	/**
	 * attilax 2016骞�9鏈�27鏃� 涓嬪崍4:55:34
	 * 
	 * @param artUrl
	 * @return
	 */
	private String getArtHtml(String artUrl) {
		websitex wc = new websitex();
		wc.refer = "http://www.czvv.com/";// http://www.czvv.com/
		String html = "";
		try {
			html = wc.WebpageContent(artUrl, "gbk", 60);
		} catch (TimeoutException e) {
			ExUtil.throwExV2(e, "--url:" + artUrl);
		}
		return html;
	}

	/**
	 * abs http://cl.cmcher.com/htm_data/16/1609/2082995.html
	 * 
	 * @author attilax 鑰佸搰鐨勭埅瀛�
	 * @since p17 e_3_r
	 */
	// private Object getCurPageUrl(int i) {
	// String s="http://www.czvv.com/k"+ Base64. encode(kw,"utf-8")
	// +"p@pagec0cc0s0m0e0f0d0.html".replaceAll("@page", String.valueOf(i-1));
	// return s;
	// }

	/**
	 * attilax 2016骞�9鏈�27鏃� 涓嬪崍4:38:55
	 * 
	 * @param artUrl
	 * @return
	 */

	/**
	 * attilax 2016骞�9鏈�27鏃� 涓嬪崍4:34:55
	 * 
	 * @param picurl
	 * @param title
	 */
	public void downPic(String picurl, String title) {

		logger.info("--exe downpic,title : " + title + ",,," + picurl);
		String name2 = filex.getFileName(picurl);
		String ext = filex.getExtName(picurl);
		if (ext.trim().equals(""))
			name2 = name2 + ".jpg";
		else if (ext.trim().length() > 5)
			name2 = name2 + ".jpg";
		else if (!ext.trim().toLowerCase().equals("jpg")
				&& !ext.trim().toLowerCase().equals("jpeg")
				&& !ext.trim().toLowerCase().equals("png"))
			name2 = name2 + ".jpg";
		String subDir = filex.fileNameEncode(title);
		name2 = filex.fileNameEncode(name2);

		String savepath = this.picSaveDir + "\\" + subDir + "\\" + name2;

		delDownHalfFile(savepath);
		if (new File(savepath).exists()) // already down full and ok
		{
			return; // brk jmp
		}

		if (picurl.trim().toLowerCase().endsWith(".gif"))
			return;

		try {

			new HttpDownloader().down(picurl, savepath, 90);
		} catch (Exception e) {
			// if (new File(savepath).exists())
			// new File(savepath).delete();
			ExUtil.throwExV2(e, "--url:" + picurl);
		} finally {
			try {
				delDownHalfFile(savepath);
			} catch (Exception e2) {
				e2.printStackTrace();
			}

		}

	}

	private void delDownHalfFile(String savepath) {
		String targetDir = "c:\\0picSaveDir_downEx\\" + filex.getUUidName()+"\\" + filex.getUUidName();
		if (new File(savepath).exists()) {
			int brk_pct = 0;
			try {
				brk_pct = new imgx().GrayLinePercent(savepath);
			} catch (PngFormatEx e) {
				
				filex.move(savepath,
						targetDir,
						this.picSaveDir);

				// new File(savepath).delete();
				logger.info("--delfile:" + savepath);
			}

			if (brk_pct > 10) {
				filex.move(savepath,
						targetDir,
						this.picSaveDir);
				;
				// new File(savepath).delete();
				logger.info("--delfile:" + savepath);
			}
		}
	}

	/**
	 * @author attilax 鑰佸搰鐨勭埅瀛�
	 * @param artUrl
	 * @since p17 d_58_42
	 */
	private List getPics_byArtUrl(String artUrl) {

		String html = getArtHtml(artUrl);

		return getPics_byHtml(html, artUrl);

		// art.net.html

		// String line = name + "," + tel + "," + lyesyiren + "," + addr;
		// fx.appendLine_flush_safe(line);
		// System.out.println(line);

	}

	/**
	 * attilax 2016骞�9鏈�27鏃� 涓嬪崍4:04:56
	 * 
	 * @param artUrl
	 * @param pic
	 * @return
	 */
	public String getAbsUrlPic(String artUrl, String pic) {
		// UrlX.getPath(artUrl);
		return UrlX.getPath(artUrl) + "/" + pic;
	}

	/**
	 * abs attilax 2016骞�9鏈�27鏃� 涓嬪崍3:07:36
	 * 
	 * @param string
	 * @return
	 */
	public List<String> getPics_byHtml(String html, String artUrl) {
		List<String> li = Lists.newArrayList();
		Document doc = null;
		doc = Jsoup.parse(html);
		Elements input = doc.getElementsByTag("input");
		for (Element element : input) {

			try {
				addPic2li(element, li, artUrl);
				// if(r.equals("continue"))
				// continue;
			} catch (Exception e) {
				e.printStackTrace();
			}

		}

		Elements imgs = doc.getElementsByTag("img");
		for (Element element : imgs) {

			try {
				addPic2li(element, li, artUrl);
				// if(r.equals("continue"))
				// continue;
			} catch (Exception e) {
				e.printStackTrace();
			}

		}
		return li;
	}

	/**
	 * attilax 2016骞�9鏈�28鏃� 涓婂崍12:19:01
	 * 
	 * @param element
	 * @param li
	 * @param artUrl
	 */
	private void addPic2li(Element element, List<String> li, String artUrl) {
		String pic = element.attr("src");
		if (pic.trim().length() < 10)
			return;

		// abs url
		if (pic.trim().startsWith("http")) {
			pic = clrPicUrl(pic);
			if (pic.trim().toLowerCase().endsWith(".gif"))
				return;
			if (!li.contains(pic))
				li.add(pic);
			return;
		}

		// if relate path
		if (pic.endsWith("-br-")) {
			pic = pic.replace("-br-", "");
			pic = pic.trim();
		}
		pic = getAbsUrlPic(artUrl, pic);
		if (pic.trim().toLowerCase().endsWith(".gif"))
			return;
		if (!li.contains(pic))
			li.add(pic);
		li.add(pic);
		// return "";

	}

	/**
	 * attilax 2016骞�9鏈�27鏃� 涓嬪崍4:33:34
	 * 
	 * @param pic
	 * @return
	 */
	private String clrPicUrl(String pic) {

		if (pic.endsWith("<br>")) {
			pic = pic.replace("<br>", "");
			pic = pic.trim();
		}
		return pic;
	}

	/**
	 * abs * @author attilax 鑰佸搰鐨勭埅瀛�
	 * 
	 * @param html
	 * @throws NoRztEx
	 * @throws ParseLsitEx
	 * @since p17 d_57_m
	 */
	public List getArtListByPagehtml(String html) {

		List<String> li = Lists.newArrayList();

		// new 36.html
		try {
			Document doc = null;
			doc = Jsoup.parse(html);
			Elements tabs = doc.getElementsByTag("a");

			for (Element element : tabs) {
				if (element.attr("href").contains("htm_data")) {
					String artUrl = startUrl+"/"
							+ element.attr("href");
					if (!li.contains(artUrl))
						li.add(artUrl);
				}
			}

		} catch (Exception e) {
			e.printStackTrace();
			// System.out.println("norzt:" + addr);
			throw new RuntimeException("noRzt");
		}

		return li;

	}
//	public  String startUrl = "http://www.1024goto.com";
	/**
	 * abs
	 * 
	 * @author attilax 鑰佸搰鐨勭埅瀛�
	 * @since p17 d_55_h
	 */
	public List<String> getpageUrls() {
		String tmp = "http://cl.cmcher.com/thread0806.php?fid=16&search=&page=$p$";
	
		
			tmp="@host@/thread0806.php?fid=16&search=&page=$p$";
			tmp=tmp.replace("@host@", startUrl);
		List<String> li = Lists.newArrayList();
		for (int i = startPage; i <= endPage; i++) {
			String t2 = tmp.replace("$p$", String.valueOf(i));
			li.add(t2);
		}
		return li;
	}

}
